Performance Evaluation-II

Week 8 R-lab
Zhaohu(Jonathan) Fan

07/01/2021

Data Partitioning

Training & Test Sets

Data Example

Data Example (cont’d)

library(readxl)
library(quantmod)
setwd("C:/Users/fanzh/OneDrive - University of Cincinnati/UC_couse/000_Teaching_4090_SS21/uc-bana7052-master/docs/slides")
df <- read_xlsx(path = "SouvenirSales.xlsx")
Sales_ts<- ts(df$Sales, start = as.yearmon("1995-01"),end = as.yearmon("2001-12"),freq = 12)
plot(Sales_ts,type = "b",xlab= "Time (in Months)", ylab = "monthly sales ", main = "Time Series Plot of monthly sales from Jan 1995 - Dec 2001")

Data Example (cont’d)

# Data partition
library(tidyverse)
#The filter() function is used to subset a data frame, retaining all rows that satisfy your conditions. 
train <- filter(df, Date < as.Date("2001-01-01"))
test  <- filter(df, Date >= as.Date("2001-01-01"))
head(train, 12)
## # A tibble: 12 x 2
##    Date                 Sales
##    <dttm>               <dbl>
##  1 1995-01-01 00:00:00  1665.
##  2 1995-02-01 00:00:00  2398.
##  3 1995-03-01 00:00:00  2841.
##  4 1995-04-01 00:00:00  3547.
##  5 1995-05-01 00:00:00  3753.
##  6 1995-06-01 00:00:00  3715.
##  7 1995-07-01 00:00:00  4350.
##  8 1995-08-01 00:00:00  3566.
##  9 1995-09-01 00:00:00  5022.
## 10 1995-10-01 00:00:00  6423.
## 11 1995-11-01 00:00:00  7601.
## 12 1995-12-01 00:00:00 19756.
head(test, 12)
## # A tibble: 12 x 2
##    Date                  Sales
##    <dttm>                <dbl>
##  1 2001-01-01 00:00:00  10243.
##  2 2001-02-01 00:00:00  11267.
##  3 2001-03-01 00:00:00  21827.
##  4 2001-04-01 00:00:00  17357.
##  5 2001-05-01 00:00:00  15998.
##  6 2001-06-01 00:00:00  18602.
##  7 2001-07-01 00:00:00  26155.
##  8 2001-08-01 00:00:00  28587.
##  9 2001-09-01 00:00:00  30505.
## 10 2001-10-01 00:00:00  30821.
## 11 2001-11-01 00:00:00  46634.
## 12 2001-12-01 00:00:00 104661.

Data Example (cont’d)

train.ts <- ts(train["Sales"], start = c(1995, 1), frequency = 12)
test.ts <- ts(test["Sales"], start = c(2001, 1), frequency = 12)
head(train.ts, 12)
##          Sales
##  [1,]  1664.81
##  [2,]  2397.53
##  [3,]  2840.71
##  [4,]  3547.29
##  [5,]  3752.96
##  [6,]  3714.74
##  [7,]  4349.61
##  [8,]  3566.34
##  [9,]  5021.82
## [10,]  6423.48
## [11,]  7600.60
## [12,] 19756.21
head(test.ts, 12)
##           Sales
##  [1,]  10243.24
##  [2,]  11266.88
##  [3,]  21826.84
##  [4,]  17357.33
##  [5,]  15997.79
##  [6,]  18601.53
##  [7,]  26155.15
##  [8,]  28586.52
##  [9,]  30505.41
## [10,]  30821.33
## [11,]  46634.38
## [12,] 104660.67

Data Example (cont’d)

plot(train.ts,type = "b",xlab= "Time (in Months)", ylab = "monthly sales ", main = "Time Series Plot of monthly sales from Jan 1995 - Dec 2000")

Data Example (cont’d)

plot(test.ts,type = "b",xlab= "Time (in Months)", ylab = "monthly sales ", main = "Time Series Plot of monthly sales from Jan 2001 - Dec 2001")

Forecasting Method(s)

Naïve Model

 # Naïve Model
library(forecast)
naive.model<-naive(train.ts, 12)
summary(naive.model)
## 
## Forecast method: Naive method
## 
## Model Information:
## Call: naive(y = train.ts, h = 12) 
## 
## Residual sd: 10460.7283 
## 
## Error measures:
##                    ME     RMSE      MAE       MPE     MAPE    MASE       ACF1
## Training set 1113.477 10460.73 5506.879 -25.27554 61.16191 1.47054 -0.1968879
## 
## Forecasts:
##          Point Forecast    Lo 80     Hi 80     Lo 95    Hi 95
## Jan 2001       80721.71 67315.75  94127.67 60219.059 101224.4
## Feb 2001       80721.71 61762.82  99680.60 51726.583 109716.8
## Mar 2001       80721.71 57501.90 103941.52 45210.077 116233.3
## Apr 2001       80721.71 53909.78 107533.64 39716.409 121727.0
## May 2001       80721.71 50745.07 110698.35 34876.389 126567.0
## Jun 2001       80721.71 47883.94 113559.48 30500.677 130942.7
## Jul 2001       80721.71 45252.87 116190.55 26476.795 134966.6
## Aug 2001       80721.71 42803.92 118639.50 22731.457 138712.0
## Sep 2001       80721.71 40503.82 120939.60 19213.758 142229.7
## Oct 2001       80721.71 38328.33 123115.09 15886.636 145556.8
## Nov 2001       80721.71 36259.16 125184.26 12722.110 148721.3
## Dec 2001       80721.71 34282.09 127161.33  9698.445 151745.0

Seasonal Naïve Model

 # Seasonal Naïve Model
library(forecast)
snaive.model<-snaive(train.ts, 12)
snaive.model
##          Point Forecast      Lo 80    Hi 80      Lo 95    Hi 95
## Jan 2001        7615.03  -673.8117 15903.87 -5061.6594 20291.72
## Feb 2001        9849.69  1560.8483 18138.53 -2826.9994 22526.38
## Mar 2001       14558.40  6269.5583 22847.24  1881.7106 27235.09
## Apr 2001       11587.33  3298.4883 19876.17 -1089.3594 24264.02
## May 2001        9332.56  1043.7183 17621.40 -3344.1294 22009.25
## Jun 2001       13082.09  4793.2483 21370.93   405.4006 25758.78
## Jul 2001       16732.78  8443.9383 25021.62  4056.0906 29409.47
## Aug 2001       19888.61 11599.7683 28177.45  7211.9206 32565.30
## Sep 2001       23933.38 15644.5383 32222.22 11256.6906 36610.07
## Oct 2001       25391.35 17102.5083 33680.19 12714.6606 38068.04
## Nov 2001       36024.80 27735.9583 44313.64 23348.1106 48701.49
## Dec 2001       80721.71 72432.8683 89010.55 68045.0206 93398.40

Forecast Accuracy Measures

Assessing Residuals

When applying a forecasting method, it is important to always check that the residuals are well-behaved (i.e., no outliers or patterns) and resemble white noise.

Assessing Residuals (cont’d)

# measuring forecast accuracy
checkresiduals(naive.model)

## 
##  Ljung-Box test
## 
## data:  Residuals from Naive method
## Q* = 62.944, df = 14, p-value = 3.551e-08
## 
## Model df: 0.   Total lags used: 14

Assessing Residuals (cont’d)

# measuring forecast accuracy
checkresiduals(snaive.model)

## 
##  Ljung-Box test
## 
## data:  Residuals from Seasonal naive method
## Q* = 30.971, df = 14, p-value = 0.005596
## 
## Model df: 0.   Total lags used: 14

Forecast Accuracy Measures

Forecast Accuracy Measures (cont’d)

Forecast Accuracy Measures (cont’d)

# measuring forecast accuracy
accuracy(naive.model, test.ts)
##                      ME     RMSE       MAE        MPE      MAPE     MASE
## Training set   1113.477 10460.73  5506.879  -25.27554  61.16191  1.47054
## Test set     -50500.288 56099.07 54490.114 -287.13834 290.95050 14.55087
##                    ACF1 Theil's U
## Training set -0.1968879        NA
## Test set      0.3182456  6.649124

Forecast Accuracy Measures (cont’d)

# measuring forecast accuracy
accuracy(snaive.model, test.ts)
##                    ME     RMSE      MAE      MPE     MAPE     MASE      ACF1
## Training set 3401.361 6467.818 3744.801 22.39270 25.64127 1.000000 0.4140974
## Test set     7828.278 9542.346 7828.278 27.27926 27.27926 2.090439 0.2264895
##              Theil's U
## Training set        NA
## Test set     0.7373759

Perdicition Intervals

Data Example (cont’d): Prediction Intervals

 #  Naïve Model
autoplot(naive.model)

Perdicition Intervals (cont’d)

 # Naïve Model
library(forecast)
naive.model<-naive(train.ts, 12)
summary(naive.model)
## 
## Forecast method: Naive method
## 
## Model Information:
## Call: naive(y = train.ts, h = 12) 
## 
## Residual sd: 10460.7283 
## 
## Error measures:
##                    ME     RMSE      MAE       MPE     MAPE    MASE       ACF1
## Training set 1113.477 10460.73 5506.879 -25.27554 61.16191 1.47054 -0.1968879
## 
## Forecasts:
##          Point Forecast    Lo 80     Hi 80     Lo 95    Hi 95
## Jan 2001       80721.71 67315.75  94127.67 60219.059 101224.4
## Feb 2001       80721.71 61762.82  99680.60 51726.583 109716.8
## Mar 2001       80721.71 57501.90 103941.52 45210.077 116233.3
## Apr 2001       80721.71 53909.78 107533.64 39716.409 121727.0
## May 2001       80721.71 50745.07 110698.35 34876.389 126567.0
## Jun 2001       80721.71 47883.94 113559.48 30500.677 130942.7
## Jul 2001       80721.71 45252.87 116190.55 26476.795 134966.6
## Aug 2001       80721.71 42803.92 118639.50 22731.457 138712.0
## Sep 2001       80721.71 40503.82 120939.60 19213.758 142229.7
## Oct 2001       80721.71 38328.33 123115.09 15886.636 145556.8
## Nov 2001       80721.71 36259.16 125184.26 12722.110 148721.3
## Dec 2001       80721.71 34282.09 127161.33  9698.445 151745.0

Perdicition Intervals (cont’d)

 # Seasonal Naïve Model
library(forecast)
snaive.model<-snaive(train.ts, 12)
snaive.model
##          Point Forecast      Lo 80    Hi 80      Lo 95    Hi 95
## Jan 2001        7615.03  -673.8117 15903.87 -5061.6594 20291.72
## Feb 2001        9849.69  1560.8483 18138.53 -2826.9994 22526.38
## Mar 2001       14558.40  6269.5583 22847.24  1881.7106 27235.09
## Apr 2001       11587.33  3298.4883 19876.17 -1089.3594 24264.02
## May 2001        9332.56  1043.7183 17621.40 -3344.1294 22009.25
## Jun 2001       13082.09  4793.2483 21370.93   405.4006 25758.78
## Jul 2001       16732.78  8443.9383 25021.62  4056.0906 29409.47
## Aug 2001       19888.61 11599.7683 28177.45  7211.9206 32565.30
## Sep 2001       23933.38 15644.5383 32222.22 11256.6906 36610.07
## Oct 2001       25391.35 17102.5083 33680.19 12714.6606 38068.04
## Nov 2001       36024.80 27735.9583 44313.64 23348.1106 48701.49
## Dec 2001       80721.71 72432.8683 89010.55 68045.0206 93398.40

Training and Test Sets

Time Series Cross-Validation

Time Series Cross-Validation (cont’d)

This procedure is sometimes known as a “rolling forecasting origin” because the “origin” at which the forecast is based rolls forward in time as displayed by each row in the above illustration.

Time Series Cross-Validation (cont’d)

Time Series Cross-Validation (cont’d)

Example: Time Series Cross-Validation

 # Naïve Model
errors <- tsCV(Sales_ts, forecastfunction = naive, h = 1)
MSE=mean(errors^2, na.rm = TRUE)
MSE
## [1] 199536610
RMSE=sqrt(MSE)
RMSE
## [1] 14125.74

Example: Time Series Cross-Validation (cont’d)

 # Seasonal Naïve Model
errors <- tsCV(Sales_ts, forecastfunction = snaive, h = 1)
MSE=mean(errors^2, na.rm = TRUE)
MSE
## [1] 48481121
RMSE=sqrt(MSE)
RMSE
## [1] 6962.839

Time Series Cross-Validation (cont’d)

#Naïve Model
errors <- tsCV(Sales_ts, forecastfunction = snaive, h = 1)
MSE=mean(errors^2, na.rm = TRUE)
MSE
RMSE=sqrt(MSE)
RMSE

Loop in R

Loop in R: A toy example

with given as follows

Loop in R: Another example

Loop in R: Another example

sum<-0
for(i in 1:100){
  sum<-sum+1/i
}
sum
## [1] 5.187378

Time Series Cross-Validation (cont’d)

# create empty vector to hold MSE values
MSE <- vector("numeric", 10)
for(h in 1:10) {
  errors <- tsCV(Sales_ts, forecastfunction = naive, h = h)
  MSE[h] <- mean(errors^2, na.rm = TRUE)
}

MSE
##  [1] 199536610 235085506 241901650 252366669 264360766 276687642 284985104
##  [8] 289011592 290225493 295118525

Time Series Cross-Validation (cont’d)

# create empty vector to hold RMSE values
RMSE <- vector("numeric", 10)
for(h in 1:10) {
  errors <- tsCV(Sales_ts, forecastfunction = naive, h = h)
  RMSE[h] <- sqrt(mean(errors^2, na.rm = TRUE))
}
RMSE
##  [1] 14125.74 15332.50 15553.19 15886.05 16259.17 16633.93 16881.50 17000.34
##  [9] 17036.01 17179.01

Time series in R

x <- c(123,39,78,52,110)
yr <- 2012:2016
knitr::kable(data.frame(Year=yr,Observation=x), booktabs=TRUE)
Year Observation
2012 123
2013 39
2014 78
2015 52
2016 110
y <- ts(c(123,39,78,52,110), start=2012)
y
## Time Series:
## Start = 2012 
## End = 2016 
## Frequency = 1 
## [1] 123  39  78  52 110

Time series in R (cont’d)

data(AirPassengers)
AirPassengers_ts<- ts(AirPassengers, start = as.yearmon("1949-01"),end = as.yearmon("1960-12"),freq = 12)
test.ts <- ts(AirPassengers, start=c(1949, 1), end=c(1950,12), frequency = 12)
test.ts 
##      Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1949 112 118 132 129 121 135 148 148 136 119 104 118
## 1950 115 126 141 135 125 149 170 170 158 133 114 140
# a good  R code
test1<-window(AirPassengers, start =c(1949,1),end =c(1950,12),freq = 12)
test1
##      Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1949 112 118 132 129 121 135 148 148 136 119 104 118
## 1950 115 126 141 135 125 149 170 170 158 133 114 140
# a  bad R code: receive warning messages!
test2<- window(AirPassengers, start = as.yearmon("1949-01"),end = as.yearmon("1950-12"),freq = 12)
test2
##      Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1949 112 118 132 129 121 135 148 148 136 119 104 118
## 1950 115 126 141 135 125 149 170 170 158 133 114 140